Observed log-chlorophyll at representative station for the St. Lucie Estuary

library(tidyverse)
library(lubridate)
library(mgcv)  
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')

# format the data to model
data(sl_dat)
sl_mod <- sl_dat %>%
  rename(date = Date) %>% 
  mutate(
    doy = yday(date), 
    dec_time = decimal_date(date), 
    yr = year(date),
    mo = month(date, label = T)
  ) %>% 
  mutate(
    flo = sal, 
    lnchl = log(1 + chl)
    ) %>% 
  select(-sal)

# plot, all
p <- ggplot(sl_mod, aes(x = date, y = lnchl)) + 
  geom_line() +
  theme_bw() 
ggplotly(p)
# boxplot, by years
p <- ggplot(sl_mod, aes(x = yr, y = lnchl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sl_mod, aes(x = mo, y = lnchl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)

Some simple GAMs to explore annual, seasonal trends.

# annual only
mod1 <- gam(lnchl ~ s(dec_time, bs = 'tp'),
  data = sl_mod, 
  na.action = na.exclude
  )

# seasonal only
mod2 <- gam(lnchl ~ s(doy, bs = 'cc'), 
  knots = list(doy = c(1, 366)),
  data = sl_mod, 
  na.action = na.exclude
  )
 
# annual and seasonal, additive
mod3 <- gam(lnchl ~ s(dec_time, bs = 'tp') + 
  s(doy, bs = 'cc'), 
  knots = list(doy = c(1, 366)),
  data = sl_mod, 
  na.action = na.exclude
  )

# annual and seasonal, interaction
mod4 <- gam(lnchl ~ te(dec_time, doy, bs = c('tp', 'cc')),
  knots = list(doy = c(1, 366)),
  data = sl_mod, 
  na.action = na.exclude
  )

Summary stats of annual, seasonal models:

mods <- list(
  mod1 = mod1,
  mod2 = mod2, 
  mod3 = mod3,
  mod4 = mod4
  )

# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
mod1 s(dec_time) 1.646162 2.048370 9.703867 6.78e-05
mod2 s(doy) 4.098673 8.000000 6.815467 0.00e+00
mod3 s(dec_time) 1.943078 2.429282 8.906536 5.67e-05
mod3 s(doy) 4.114540 8.000000 7.052094 0.00e+00
mod4 te(dec_time,doy) 9.994763 12.933682 5.562618 0.00e+00
# summary stats of GAMs
map(mods, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable
name AIC R2
mod1 683.2360 0.0500261
mod2 653.5304 0.1302894
mod3 634.5680 0.1787925
mod4 643.6646 0.1668405
pred_dat <- sl_mod

# predictions
sl_res <- pred_dat %>% 
  select(date, flo, dec_time, doy, yr) %>% 
  mutate(
    mod1 = predict(mod1, newdata = pred_dat), 
    mod2 = predict(mod2, newdata = pred_dat), 
    mod3 = predict(mod3, newdata = pred_dat), 
    mod4 = predict(mod4, newdata = pred_dat)
  ) %>% 
  tidyr::gather('mods', 'pred', -date, -doy, -dec_time, -flo, -yr)

# plot
p <- ggplot(sl_res, aes(x = date)) + 
  geom_point(data = sl_mod, aes(y = lnchl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)
# plot
p <- ggplot(sl_res, aes(x = doy, group = factor(yr), colour = yr)) + 
  geom_line(aes(y = pred)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    ) + 
  facet_wrap(~ mods, ncol = 2)
ggplotly(p)

Adding flow data to the model:

# model, all terms additive
mod5 <- gam(lnchl ~ s(dec_time, bs = 'tp') + s(doy, bs = 'cc') + s(flo, bs = 'tp'),
  knots = list(doy = c(1, 366)),
  data = sl_mod,
  na.action = na.exclude
  )

# model, flo additive, interaction between dec_time, doy
mod6 <- gam(lnchl ~ te(dec_time, doy, bs = c('tp', 'cc')) + s(flo, bs = 'tp'),
  knots = list(doy = c(1, 366)),
  data = sl_mod,
  na.action = na.exclude
  )

# model, doy additive, interaction between dec_time and flow
mod7 <- gam(lnchl ~ te(dec_time, flo, bs = c('tp', 'tp')) + s(doy, bs = 'cc'),
  knots = list(doy = c(1, 366)),
  data = sl_mod,
  na.action = na.exclude
  )

# model, dec_time additive, interaction between flo and doy
mod8 <- gam(lnchl ~ te(flo, doy, bs = c('tp', 'cc')) + s(dec_time, bs = 'tp'),
  knots = list(doy = c(1, 366)),
  data = sl_mod,
  na.action = na.exclude
  )

# model, interaction between flow and doy, interaction between flo and dec_time
mod9 <- gam(lnchl ~ te(flo, doy, bs = c('tp', 'cc')) + te(flo, dec_time, bs = c('tp', 'tp')),
  knots = list(doy = c(1, 366)),
  data = sl_mod,
  na.action = na.exclude
  )

# model, all terms interaction
mod10 <- gam(lnchl ~ te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
  knots = list(doy = c(1, 366)),
  data = sl_mod,
  na.action = na.exclude
  )

Summary stats of annual, seasonal, flow models:

mods2 <- list(
  mod5 = mod5,
  mod6 = mod6, 
  mod7 = mod7,
  mod8 = mod8,
  mod9 = mod9, 
  mod10 = mod10
  )

# smoother stats of GAMs
map(mods2, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
mod5 s(dec_time) 2.024919 2.531950 7.173692 0.0003743
mod5 s(doy) 3.670840 8.000000 6.512722 0.0000000
mod5 s(flo) 2.821335 3.513840 4.559861 0.0025490
mod6 te(dec_time,doy) 7.580584 9.991352 6.465608 0.0000000
mod6 s(flo) 2.843758 3.538719 4.837179 0.0016166
mod7 te(dec_time,flo) 9.293442 11.920524 4.107297 0.0000058
mod7 s(doy) 3.634232 8.000000 6.279981 0.0000000
mod8 te(flo,doy) 7.669868 10.031915 7.137935 0.0000000
mod8 s(dec_time) 2.090841 2.610887 7.828485 0.0001493
mod9 te(flo,doy) 8.154234 10.577257 6.447297 0.0000000
mod9 te(flo,dec_time) 6.190206 20.000000 1.310943 0.0000112
mod10 te(dec_time,doy,flo) 27.336212 37.654333 3.674406 0.0000000
# summary stats of GAMs
map(mods2, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable
name AIC R2
mod5 527.8769 0.2291574
mod6 534.5420 0.2171463
mod7 522.0281 0.2536808
mod8 525.3183 0.2383925
mod9 522.0352 0.2568790
mod10 510.9187 0.3099731
# predictions
sl_res2 <- pred_dat %>% 
  select(date, flo, dec_time, doy, yr) %>% 
  mutate(
    mod5 = predict(mod5, newdata = pred_dat), 
    mod6 = predict(mod6, newdata = pred_dat), 
    mod7 = predict(mod7, newdata = pred_dat), 
    mod8 = predict(mod8, newdata = pred_dat),
    mod9 = predict(mod9, newdata = pred_dat),
    mod10 = predict(mod10, newdata = pred_dat)
  ) %>% 
  tidyr::gather('mods', 'pred', -date, -doy, -dec_time, -flo, -yr)

# plot
p <- ggplot(sl_res2, aes(x = date)) + 
  geom_point(data = sl_mod, aes(y = lnchl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)
ptheme <- theme(
  axis.title.x = element_blank(), 
  axis.title.y = element_blank()
)
cols <- 'Spectral'
p5 <- dynagam(mod5, pred_dat, ncol = 1, col_vec = cols) + 
  ptheme + 
  theme(legend.position = 'none') +
  ggtitle('mod5')
p6 <- dynagam(mod6, pred_dat, ncol = 1, col_vec = cols) + 
  ptheme + 
  theme(legend.position = 'none') + 
  ggtitle('mod6')
p7 <- dynagam(mod7, pred_dat, ncol = 1, col_vec = cols) + 
  ptheme + 
  theme(legend.position = 'none') + 
  ggtitle('mod7')
p8 <- dynagam(mod8, pred_dat, ncol = 1, col_vec = cols) + 
  ptheme + 
  theme(legend.position = 'none') + 
  ggtitle('mod8')
p9 <- dynagam(mod9, pred_dat, ncol = 1, col_vec = cols) + 
  ptheme + 
  theme(legend.position = 'none') + 
  ggtitle('mod9')
p10 <- dynagam(mod10, pred_dat, ncol = 1, col_vec = cols) + 
  ptheme + 
  ggtitle('mod10')
pleg <- g_legend(p10)
p10 <- p10 + 
  theme(legend.position = 'none')

grid.arrange(
  pleg, 
  arrangeGrob(p5, p6, p7, p8, p9, p10, ncol = 6, bottom = 'lnQ', left = 'lnchl'), 
  ncol = 1, 
  heights = c(0.1, 1)
)

Backwards model selection, see here:

mod <- gam(lnchl ~ s(dec_time, bs = 'tp') + 
              s(doy, bs = 'cc') + 
              s(flo, bs = 'tp') +
              te(flo, doy, bs = c('tp', 'cc')) + 
              te(flo, dec_time, bs = c('tp', 'tp')) +
              te(dec_time, doy, bs = c('tp', 'cc')) +
              te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
            knots = list(doy = c(1, 366)),
            data = sl_mod,
            na.action = na.exclude
)
summary(mod)
AIC(mod)

mod <- gam(lnchl ~ s(dec_time, bs = 'tp') + 
              s(doy, bs = 'cc') + 
              s(flo, bs = 'tp') +
              te(flo, doy, bs = c('tp', 'cc')) + 
              te(flo, dec_time, bs = c('tp', 'tp')) +
              te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
            knots = list(doy = c(1, 366)),
            data = sl_mod,
            na.action = na.exclude
)
summary(mod)
AIC(mod)

mod <- gam(lnchl ~ s(dec_time, bs = 'tp') + 
              s(doy, bs = 'cc') + 
              te(flo, doy, bs = c('tp', 'cc')) + 
              te(flo, dec_time, bs = c('tp', 'tp')) +
              te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
            knots = list(doy = c(1, 366)),
            data = sl_mod,
            na.action = na.exclude
)
summary(mod)
AIC(mod)

mod <- gam(lnchl ~ s(dec_time, bs = 'tp') + 
              s(doy, bs = 'cc') + 
              te(flo, dec_time, bs = c('tp', 'tp')) +
              te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
            knots = list(doy = c(1, 366)),
            data = sl_mod,
            na.action = na.exclude
)
summary(mod)
AIC(mod)

mod <- gam(lnchl ~ s(dec_time, bs = 'tp') + 
              s(doy, bs = 'cc') + 
              te(flo, doy, bs = c('tp', 'cc')) + 
              te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
            knots = list(doy = c(1, 366)),
            data = sl_mod,
            na.action = na.exclude
)
summary(mod)
AIC(mod)

mod <- gam(lnchl ~ s(dec_time, bs = 'tp') + 
              s(doy, bs = 'cc') + 
              te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
            knots = list(doy = c(1, 366)),
            data = sl_mod,
            na.action = na.exclude
)
summary(mod)
AIC(mod)

mod <- gam(lnchl ~ s(doy, bs = 'cc') + 
              te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp')),
            knots = list(doy = c(1, 366)),
            data = sl_mod,
            na.action = na.exclude
)
summary(mod)
AIC(mod)